sum_4_normalized_pressures_health_selective <- read_csv(here("data/sum_4_normalized_pressures_health_selective.csv"))
## Rows: 3133 Columns: 65
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): fips, state, county
## dbl (62): sum_1_disturbance, sum_1_water, sum_1_ghg, sum_1_nutrient, sum_4_c...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

6 States with highest cumulative pressure of food production

state_data <- sum_4_normalized_pressures_health_selective %>% 
  group_by(state) %>% 
  summarise(cum_pressure = sum(sum_4_cumulative),
            median_rural = median(per_rural), 
            median_state_income = median(median_household_income),
            state_pop = sum(population))
# filter state data to get top 6 states regarding cumulative pressure, filter for those states

sum_4_normalized_pressures_health_selective_top_states <- sum_4_normalized_pressures_health_selective %>% 
  filter(state == "Iowa" | state == "Illionis"|state == "Nebraska"| state == "Indiana"| state =="Minnesota"| state =="Kansas")
median(sum_4_normalized_pressures_health_selective_top_states$sum_4_cumulative)
## [1] 0.003039106
median(sum_4_normalized_pressures_health_selective$sum_4_cumulative)
## [1] 0.0007026671
max(sum_4_normalized_pressures_health_selective_top_states$sum_4_cumulative)
## [1] 0.009332784
max(sum_4_normalized_pressures_health_selective$sum_4_cumulative)
## [1] 0.009332784
min(sum_4_normalized_pressures_health_selective_top_states$sum_4_cumulative)
## [1] 0.000003166402
sum_4_normalized_pressures_health_selective_no_na_top_states <- sum_4_normalized_pressures_health_selective_top_states %>% 
  drop_na(per_fair_poor_health, per_low_birthweight, per_access_to_exercise, life_expectancy, per_freq_mental_distress, per_freq_mental_distress, per_adult_obesity, median_household_income, per_rural, per_access_to_exercise, per_uninsured, primary_care_phys_quartile, mental_health_providers_quartile, per_child_poverty, air_pollution_avg_daily_pm2.5, drinking_water_violation_quartile, per_severe_house_cost_burden, per_severe_housing_problems, per_food_insecure, sum_4_cumulative, per_limited_access_healthy_food, per_black, per_asian, per_am_indian_alaska_native, per_nativeHA_other_pacific_isl, per_hispanic, population, per_smokers, per_physically_inactive, per_excessive_drinking, per_flu_vaccinated, per_completed_hs, per_some_college, traffic_volume, per_broadband_access
          )
rf_per_fair_poor_health_top_states <- randomForest(per_fair_poor_health ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)

print(rf_per_fair_poor_health_top_states)
## 
## Call:
##  randomForest(formula = per_fair_poor_health ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 0.8094195
##                     % Var explained: 90.86
print(importance(rf_per_fair_poor_health_top_states,type = 2))
##                                   IncNodePurity
## per_rural                             28.786756
## median_household_income              105.061185
## population                            22.455681
## per_access_to_exercise                20.879370
## per_uninsured                        281.138617
## primary_care_phys_quartile             8.780831
## mental_health_providers_quartile       6.933924
## per_child_poverty                    136.595556
## air_pollution_avg_daily_pm2.5         63.056478
## drinking_water_violation_quartile      3.926460
## per_severe_house_cost_burden          13.720881
## per_severe_housing_problems           33.272707
## per_food_insecure                    161.129332
## sum_4_cumulative                      22.752731
## per_limited_access_healthy_food       17.183330
## per_black                             21.355248
## per_asian                             17.605994
## per_am_indian_alaska_native           41.978807
## per_nativeHA_other_pacific_isl         6.096772
## per_hispanic                         131.882880
## per_smokers                          571.564248
## per_physically_inactive               38.496805
## per_excessive_drinking              1022.186738
## per_flu_vaccinated                    16.855264
## per_completed_hs                     506.712898
## per_some_college                     576.266628
## traffic_volume                        20.863773
## per_broadband_access                  43.284037
varImpPlot(rf_per_fair_poor_health_top_states, type = 2)

rf_life_expectancy_top_states <- randomForest(life_expectancy ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)

print(rf_life_expectancy_top_states)
## 
## Call:
##  randomForest(formula = life_expectancy ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 2.830156
##                     % Var explained: 48.59
print(importance(rf_life_expectancy_top_states,type = 2))
##                                   IncNodePurity
## per_rural                              28.84049
## median_household_income               133.88460
## population                             65.14073
## per_access_to_exercise                 55.57058
## per_uninsured                          65.35269
## primary_care_phys_quartile             15.72920
## mental_health_providers_quartile       16.10401
## per_child_poverty                     305.90368
## air_pollution_avg_daily_pm2.5          83.41265
## drinking_water_violation_quartile       8.58231
## per_severe_house_cost_burden           39.62646
## per_severe_housing_problems            47.36174
## per_food_insecure                     197.55617
## sum_4_cumulative                       54.57015
## per_limited_access_healthy_food        54.43319
## per_black                              38.89374
## per_asian                              43.92771
## per_am_indian_alaska_native            75.88630
## per_nativeHA_other_pacific_isl         11.27913
## per_hispanic                           47.47724
## per_smokers                           280.21599
## per_physically_inactive               123.81523
## per_excessive_drinking                178.11039
## per_flu_vaccinated                     64.67701
## per_completed_hs                      118.16497
## per_some_college                       85.65338
## traffic_volume                         49.69682
## per_broadband_access                   89.22437
varImpPlot(rf_life_expectancy_top_states, type = 2)

rf_per_low_birthweight_top_states <- randomForest(per_low_birthweight ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)

print(rf_per_low_birthweight_top_states)
## 
## Call:
##  randomForest(formula = per_low_birthweight ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 1.061943
##                     % Var explained: 29.64
print(importance(rf_per_low_birthweight_top_states,type = 2))
##                                   IncNodePurity
## per_rural                             17.738903
## median_household_income               27.398507
## population                            43.463242
## per_access_to_exercise                22.852275
## per_uninsured                         25.638052
## primary_care_phys_quartile             5.858828
## mental_health_providers_quartile       9.583828
## per_child_poverty                     24.669704
## air_pollution_avg_daily_pm2.5         74.695058
## drinking_water_violation_quartile      4.486703
## per_severe_house_cost_burden          12.339381
## per_severe_housing_problems           13.682084
## per_food_insecure                     32.244399
## sum_4_cumulative                      30.188080
## per_limited_access_healthy_food       16.750043
## per_black                             25.846735
## per_asian                             15.065219
## per_am_indian_alaska_native           18.952179
## per_nativeHA_other_pacific_isl         5.470050
## per_hispanic                          20.926135
## per_smokers                           23.381296
## per_physically_inactive               33.771432
## per_excessive_drinking                30.302415
## per_flu_vaccinated                    17.746845
## per_completed_hs                      16.206792
## per_some_college                      21.767651
## traffic_volume                        41.027023
## per_broadband_access                  16.090605
varImpPlot(rf_per_low_birthweight_top_states, type = 2)

rf_per_freq_mental_distress_top_states <- randomForest(per_freq_mental_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)

print(rf_per_freq_mental_distress_top_states)
## 
## Call:
##  randomForest(formula = per_freq_mental_distress ~ per_rural +      median_household_income + population + per_access_to_exercise +      per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile +      per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile +      per_severe_house_cost_burden + per_severe_housing_problems +      per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food +      per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 0.2621599
##                     % Var explained: 91.95
print(importance(rf_per_freq_mental_distress_top_states,type = 2))
##                                   IncNodePurity
## per_rural                              5.689955
## median_household_income               36.287496
## population                            10.373301
## per_access_to_exercise                10.064708
## per_uninsured                         82.063035
## primary_care_phys_quartile             1.724042
## mental_health_providers_quartile       1.249132
## per_child_poverty                     24.235228
## air_pollution_avg_daily_pm2.5        118.103790
## drinking_water_violation_quartile      1.498116
## per_severe_house_cost_burden           4.798502
## per_severe_housing_problems            6.509386
## per_food_insecure                    125.687325
## sum_4_cumulative                      15.577046
## per_limited_access_healthy_food        5.260310
## per_black                              5.134872
## per_asian                              9.130129
## per_am_indian_alaska_native           15.149269
## per_nativeHA_other_pacific_isl         2.860719
## per_hispanic                           9.286602
## per_smokers                          367.117895
## per_physically_inactive               16.617621
## per_excessive_drinking               434.437089
## per_flu_vaccinated                     6.927124
## per_completed_hs                      33.953933
## per_some_college                      66.286494
## traffic_volume                         5.918396
## per_broadband_access                  21.274809
varImpPlot(rf_per_freq_mental_distress_top_states, type = 2)

rf_per_freq_physical_distress_top_states <- randomForest(per_freq_physical_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)

print(rf_per_freq_physical_distress_top_states)
## 
## Call:
##  randomForest(formula = per_freq_physical_distress ~ per_rural +      median_household_income + population + per_access_to_exercise +      per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile +      per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile +      per_severe_house_cost_burden + per_severe_housing_problems +      per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food +      per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 0.282749
##                     % Var explained: 88.65
print(importance(rf_per_freq_physical_distress_top_states,type = 2))
##                                   IncNodePurity
## per_rural                              4.836674
## median_household_income               34.355501
## population                             6.066634
## per_access_to_exercise                 5.449328
## per_uninsured                         71.122487
## primary_care_phys_quartile             2.252014
## mental_health_providers_quartile       1.440958
## per_child_poverty                     43.193670
## air_pollution_avg_daily_pm2.5         27.394455
## drinking_water_violation_quartile      1.161147
## per_severe_house_cost_burden           4.451435
## per_severe_housing_problems           10.383857
## per_food_insecure                     60.300711
## sum_4_cumulative                       9.956034
## per_limited_access_healthy_food        6.323209
## per_black                              4.039418
## per_asian                              6.563057
## per_am_indian_alaska_native           19.773587
## per_nativeHA_other_pacific_isl         1.827321
## per_hispanic                          11.422503
## per_smokers                          303.248499
## per_physically_inactive               10.665358
## per_excessive_drinking               263.721695
## per_flu_vaccinated                     6.092631
## per_completed_hs                      53.067135
## per_some_college                     115.354058
## traffic_volume                         4.549182
## per_broadband_access                  17.400827
varImpPlot(rf_per_freq_physical_distress_top_states, type = 2)

rf_per_adult_obesisty_top_states <- randomForest(per_adult_obesity ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)

print(rf_per_adult_obesisty_top_states)
## 
## Call:
##  randomForest(formula = per_adult_obesity ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 11.74257
##                     % Var explained: 26.67
print(importance(rf_per_adult_obesisty_top_states,type = 2))
##                                   IncNodePurity
## per_rural                             146.61501
## median_household_income               250.76292
## population                            362.04415
## per_access_to_exercise                258.15551
## per_uninsured                         144.63068
## primary_care_phys_quartile            131.35205
## mental_health_providers_quartile       79.65116
## per_child_poverty                     266.87808
## air_pollution_avg_daily_pm2.5         277.33960
## drinking_water_violation_quartile      46.08876
## per_severe_house_cost_burden          141.36550
## per_severe_housing_problems           190.75389
## per_food_insecure                     163.36858
## sum_4_cumulative                      421.20268
## per_limited_access_healthy_food       166.56377
## per_black                             160.72526
## per_asian                             288.91127
## per_am_indian_alaska_native           199.21364
## per_nativeHA_other_pacific_isl         98.38842
## per_hispanic                          264.40879
## per_smokers                           169.74197
## per_physically_inactive               910.00567
## per_excessive_drinking                168.58854
## per_flu_vaccinated                    199.30157
## per_completed_hs                      242.59343
## per_some_college                      549.96623
## traffic_volume                        220.38974
## per_broadband_access                  378.84904
varImpPlot(rf_per_adult_obesisty_top_states, type = 2)

Country Level (all counties)

sum_4_normalized_pressures_health_selective_no_na_health_all_counties <- sum_4_normalized_pressures_health_selective %>% 
  drop_na(per_fair_poor_health, per_low_birthweight, per_access_to_exercise, life_expectancy, per_freq_mental_distress, per_freq_mental_distress, per_adult_obesity, median_household_income, per_rural, per_access_to_exercise, per_uninsured, primary_care_phys_quartile, mental_health_providers_quartile, per_child_poverty, air_pollution_avg_daily_pm2.5, drinking_water_violation_quartile, per_severe_house_cost_burden, per_severe_housing_problems, per_food_insecure, sum_4_cumulative, per_limited_access_healthy_food, per_black, per_asian, per_am_indian_alaska_native, per_nativeHA_other_pacific_isl, per_hispanic, population, per_smokers, per_physically_inactive, per_excessive_drinking, per_flu_vaccinated, per_completed_hs, per_some_college, traffic_volume, per_broadband_access
          )
rf_per_fair_poor_health_all_counties <- randomForest(per_fair_poor_health ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)

print(rf_per_fair_poor_health_all_counties)
## 
## Call:
##  randomForest(formula = per_fair_poor_health ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 1.40382
##                     % Var explained: 94.55
print(importance(rf_per_fair_poor_health_all_counties,type = 2))
##                                   IncNodePurity
## per_rural                             256.70763
## median_household_income              7946.51901
## population                            342.49930
## per_access_to_exercise                327.56183
## per_uninsured                         606.86633
## primary_care_phys_quartile             69.32211
## mental_health_providers_quartile       69.65844
## per_child_poverty                    9146.88505
## air_pollution_avg_daily_pm2.5         519.55709
## drinking_water_violation_quartile      49.51777
## per_severe_house_cost_burden          160.55338
## per_severe_housing_problems           273.93878
## per_food_insecure                   16556.98117
## sum_4_cumulative                      368.82763
## per_limited_access_healthy_food       205.98351
## per_black                             528.14768
## per_asian                             205.06834
## per_am_indian_alaska_native           290.18232
## per_nativeHA_other_pacific_isl         92.03013
## per_hispanic                         1289.28610
## per_smokers                          3376.88245
## per_physically_inactive               681.15287
## per_excessive_drinking               3904.60062
## per_flu_vaccinated                    297.89889
## per_completed_hs                    21524.75245
## per_some_college                     5405.13556
## traffic_volume                        239.93625
## per_broadband_access                 1712.62939
varImpPlot(rf_per_fair_poor_health_all_counties, type = 2)

rf_life_expectancy_all_counties <- randomForest(life_expectancy ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)

print(rf_life_expectancy_all_counties)
## 
## Call:
##  randomForest(formula = life_expectancy ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 2.841018
##                     % Var explained: 68.04
print(importance(rf_life_expectancy_all_counties,type = 2))
##                                   IncNodePurity
## per_rural                             370.47077
## median_household_income              2177.33275
## population                            553.57872
## per_access_to_exercise                428.64344
## per_uninsured                         376.65377
## primary_care_phys_quartile            103.21559
## mental_health_providers_quartile      123.04834
## per_child_poverty                    2047.38047
## air_pollution_avg_daily_pm2.5         754.69055
## drinking_water_violation_quartile      59.91546
## per_severe_house_cost_burden          360.74254
## per_severe_housing_problems           440.84633
## per_food_insecure                    4108.88469
## sum_4_cumulative                      525.90190
## per_limited_access_healthy_food       473.15581
## per_black                             458.94091
## per_asian                             323.79845
## per_am_indian_alaska_native           518.05550
## per_nativeHA_other_pacific_isl         93.81991
## per_hispanic                          621.69502
## per_smokers                          4908.72589
## per_physically_inactive              1696.87366
## per_excessive_drinking               1271.77056
## per_flu_vaccinated                    368.49241
## per_completed_hs                      871.78751
## per_some_college                      735.83487
## traffic_volume                        374.39356
## per_broadband_access                  719.51820
varImpPlot(rf_life_expectancy_all_counties, type = 2)

rf_per_low_birthweight_all_counties <- randomForest(per_low_birthweight ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)

print(rf_per_low_birthweight_all_counties)
## 
## Call:
##  randomForest(formula = per_low_birthweight ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 1.398634
##                     % Var explained: 66.16
print(importance(rf_per_low_birthweight_all_counties,type = 2))
##                                   IncNodePurity
## per_rural                             174.91256
## median_household_income               710.51930
## population                            295.68525
## per_access_to_exercise                176.45653
## per_uninsured                         162.40290
## primary_care_phys_quartile             53.65224
## mental_health_providers_quartile       52.00121
## per_child_poverty                    1458.75772
## air_pollution_avg_daily_pm2.5         354.02545
## drinking_water_violation_quartile      38.12946
## per_severe_house_cost_burden          259.97561
## per_severe_housing_problems           138.93731
## per_food_insecure                     785.07983
## sum_4_cumulative                      420.10664
## per_limited_access_healthy_food       244.05797
## per_black                            3560.69833
## per_asian                             128.71120
## per_am_indian_alaska_native           347.62696
## per_nativeHA_other_pacific_isl         73.97515
## per_hispanic                          283.76810
## per_smokers                           178.51290
## per_physically_inactive               228.90655
## per_excessive_drinking                757.05533
## per_flu_vaccinated                    156.87790
## per_completed_hs                      389.79011
## per_some_college                      193.33238
## traffic_volume                        180.35702
## per_broadband_access                  268.77203
varImpPlot(rf_per_low_birthweight_all_counties, type = 2)

rf_per_freq_mental_distress_all_counties <- randomForest(per_freq_mental_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)

print(rf_per_freq_mental_distress_all_counties)
## 
## Call:
##  randomForest(formula = per_freq_mental_distress ~ per_rural +      median_household_income + population + per_access_to_exercise +      per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile +      per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile +      per_severe_house_cost_burden + per_severe_housing_problems +      per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food +      per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 0.4737342
##                     % Var explained: 91.44
print(importance(rf_per_freq_mental_distress_all_counties,type = 2))
##                                   IncNodePurity
## per_rural                              90.16618
## median_household_income              1534.99764
## population                            139.70721
## per_access_to_exercise                 97.17397
## per_uninsured                         102.00067
## primary_care_phys_quartile             23.26137
## mental_health_providers_quartile       23.92665
## per_child_poverty                    1129.56584
## air_pollution_avg_daily_pm2.5         347.75955
## drinking_water_violation_quartile      23.16687
## per_severe_house_cost_burden           63.30955
## per_severe_housing_problems            84.64563
## per_food_insecure                    2713.47027
## sum_4_cumulative                      276.00176
## per_limited_access_healthy_food        63.28291
## per_black                             124.30758
## per_asian                             153.00939
## per_am_indian_alaska_native           149.06624
## per_nativeHA_other_pacific_isl         26.14238
## per_hispanic                          367.08431
## per_smokers                          5598.03309
## per_physically_inactive               187.81551
## per_excessive_drinking               1118.13368
## per_flu_vaccinated                     91.84770
## per_completed_hs                      578.54187
## per_some_college                      979.44020
## traffic_volume                         79.87425
## per_broadband_access                  209.08999
varImpPlot(rf_per_freq_mental_distress_all_counties, type = 2)

rf_per_freq_physical_distress_all_counties <- randomForest(per_freq_physical_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)

print(rf_per_freq_physical_distress_all_counties)
## 
## Call:
##  randomForest(formula = per_freq_physical_distress ~ per_rural +      median_household_income + population + per_access_to_exercise +      per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile +      per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile +      per_severe_house_cost_burden + per_severe_housing_problems +      per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food +      per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 0.5217201
##                     % Var explained: 92.28
print(importance(rf_per_freq_physical_distress_all_counties,type = 2))
##                                   IncNodePurity
## per_rural                              71.54952
## median_household_income              2452.71092
## population                            170.46982
## per_access_to_exercise                 89.78543
## per_uninsured                         125.38818
## primary_care_phys_quartile             27.10406
## mental_health_providers_quartile       27.98624
## per_child_poverty                    2384.82060
## air_pollution_avg_daily_pm2.5         148.33514
## drinking_water_violation_quartile      18.19555
## per_severe_house_cost_burden           66.03350
## per_severe_housing_problems           119.95761
## per_food_insecure                    5058.24880
## sum_4_cumulative                      182.82119
## per_limited_access_healthy_food        74.68558
## per_black                             109.75135
## per_asian                              86.42335
## per_am_indian_alaska_native           154.12568
## per_nativeHA_other_pacific_isl         44.29480
## per_hispanic                          159.34963
## per_smokers                          3751.47080
## per_physically_inactive               134.77968
## per_excessive_drinking                999.76813
## per_flu_vaccinated                    118.78978
## per_completed_hs                     1492.20049
## per_some_college                     1494.20414
## traffic_volume                        103.30618
## per_broadband_access                  386.15283
varImpPlot(rf_per_freq_physical_distress_all_counties, type = 2)

rf_per_adult_obesity_all_counties <- randomForest(per_adult_obesity ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)

print(rf_per_adult_obesity_all_counties)
## 
## Call:
##  randomForest(formula = per_adult_obesity ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 16.59649
##                     % Var explained: 52.06
print(importance(rf_per_adult_obesity_all_counties,type = 2))
##                                   IncNodePurity
## per_rural                             2184.3570
## median_household_income               3512.3292
## population                            4714.0214
## per_access_to_exercise                5014.1856
## per_uninsured                         2468.4934
## primary_care_phys_quartile             968.5378
## mental_health_providers_quartile       666.9165
## per_child_poverty                     2479.9364
## air_pollution_avg_daily_pm2.5         2898.8951
## drinking_water_violation_quartile      448.8939
## per_severe_house_cost_burden          2142.9560
## per_severe_housing_problems           1895.6377
## per_food_insecure                     2735.0840
## sum_4_cumulative                      4755.1602
## per_limited_access_healthy_food       1868.3310
## per_black                             6966.6975
## per_asian                             2030.6819
## per_am_indian_alaska_native           2427.4708
## per_nativeHA_other_pacific_isl         623.8955
## per_hispanic                          3164.6175
## per_smokers                           8583.4853
## per_physically_inactive              20665.8010
## per_excessive_drinking                3711.5668
## per_flu_vaccinated                    2156.4085
## per_completed_hs                      1879.5275
## per_some_college                      2890.1156
## traffic_volume                        2382.2113
## per_broadband_access                  4231.7795
varImpPlot(rf_per_adult_obesity_all_counties, type = 2)

Top 500 Counties

# Filter data for the top 500 counties regarding cumulative pressure

sum_4_normalized_pressures_health_selective_top_counties <-  sum_4_normalized_pressures_health_selective %>% 
  filter(sum_4_cumulative > 0.002725337)
# drop na's 
sum_4_normalized_pressures_health_selective_no_na_top_counties <- sum_4_normalized_pressures_health_selective_top_counties %>% 
  drop_na(per_fair_poor_health, per_low_birthweight, per_access_to_exercise, life_expectancy, per_freq_mental_distress, per_freq_mental_distress, per_adult_obesity, median_household_income, per_rural, per_access_to_exercise, per_uninsured, primary_care_phys_quartile, mental_health_providers_quartile, per_child_poverty, air_pollution_avg_daily_pm2.5, drinking_water_violation_quartile, per_severe_house_cost_burden, per_severe_housing_problems, per_food_insecure, sum_4_cumulative, per_limited_access_healthy_food, per_black, per_asian, per_am_indian_alaska_native, per_nativeHA_other_pacific_isl, per_hispanic, population, per_smokers, per_physically_inactive, per_excessive_drinking, per_flu_vaccinated, per_completed_hs, per_some_college, traffic_volume, per_broadband_access)
rf_per_fair_poor_health_top_counties <- randomForest(per_fair_poor_health ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)

print(rf_per_fair_poor_health_top_counties)
## 
## Call:
##  randomForest(formula = per_fair_poor_health ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 1.193045
##                     % Var explained: 95.2
print(importance(rf_per_fair_poor_health_top_counties,type = 2))
##                                   IncNodePurity
## per_rural                             34.050826
## median_household_income              672.573842
## population                            54.834333
## per_access_to_exercise                35.297699
## per_uninsured                        104.263849
## primary_care_phys_quartile            10.410936
## mental_health_providers_quartile      15.472407
## per_child_poverty                   1550.614364
## air_pollution_avg_daily_pm2.5        147.024739
## drinking_water_violation_quartile      6.653721
## per_severe_house_cost_burden          37.313590
## per_severe_housing_problems           55.632084
## per_food_insecure                   2302.847218
## sum_4_cumulative                      38.090259
## per_limited_access_healthy_food       37.971868
## per_black                            395.352398
## per_asian                             23.384750
## per_am_indian_alaska_native           46.249213
## per_nativeHA_other_pacific_isl        11.313782
## per_hispanic                         254.484289
## per_smokers                          712.203458
## per_physically_inactive               82.779413
## per_excessive_drinking              1903.806991
## per_flu_vaccinated                    55.041212
## per_completed_hs                    2435.526005
## per_some_college                     995.831350
## traffic_volume                        33.993670
## per_broadband_access                 168.719342
varImpPlot(rf_per_fair_poor_health_top_counties, type = 2)

rf_life_expectancy_top_counties <- randomForest(life_expectancy ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)

print(rf_life_expectancy_top_counties)
## 
## Call:
##  randomForest(formula = life_expectancy ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 2.068896
##                     % Var explained: 69.93
print(importance(rf_life_expectancy_top_counties,type = 2))
##                                   IncNodePurity
## per_rural                             31.937227
## median_household_income              298.739221
## population                            55.585861
## per_access_to_exercise                41.780710
## per_uninsured                         31.020836
## primary_care_phys_quartile            11.577034
## mental_health_providers_quartile      12.221595
## per_child_poverty                    362.255456
## air_pollution_avg_daily_pm2.5         83.536677
## drinking_water_violation_quartile      9.626732
## per_severe_house_cost_burden          32.851368
## per_severe_housing_problems           39.595218
## per_food_insecure                    619.646898
## sum_4_cumulative                      44.943352
## per_limited_access_healthy_food       40.785632
## per_black                            129.055539
## per_asian                             43.152987
## per_am_indian_alaska_native           46.276526
## per_nativeHA_other_pacific_isl        12.038194
## per_hispanic                          52.550449
## per_smokers                          750.373242
## per_physically_inactive               81.700387
## per_excessive_drinking               184.495571
## per_flu_vaccinated                    59.614529
## per_completed_hs                      69.248357
## per_some_college                      78.649727
## traffic_volume                        40.244411
## per_broadband_access                  81.271277
varImpPlot(rf_life_expectancy_top_counties, type = 2)

rf_per_low_birthweight_top_counties <- randomForest(per_low_birthweight ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)

print(rf_per_low_birthweight_top_counties)
## 
## Call:
##  randomForest(formula = per_low_birthweight ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 1.114922
##                     % Var explained: 71.91
print(importance(rf_per_low_birthweight_top_counties,type = 2))
##                                   IncNodePurity
## per_rural                             26.761538
## median_household_income              189.406899
## population                            40.036708
## per_access_to_exercise                26.426263
## per_uninsured                         14.611411
## primary_care_phys_quartile             7.270707
## mental_health_providers_quartile       6.079363
## per_child_poverty                    358.728333
## air_pollution_avg_daily_pm2.5         58.344957
## drinking_water_violation_quartile      5.142533
## per_severe_house_cost_burden          27.778285
## per_severe_housing_problems           19.466638
## per_food_insecure                    223.236235
## sum_4_cumulative                      29.366792
## per_limited_access_healthy_food       20.767294
## per_black                            421.531746
## per_asian                             17.717896
## per_am_indian_alaska_native           26.029840
## per_nativeHA_other_pacific_isl         7.151121
## per_hispanic                          24.572842
## per_smokers                           68.019324
## per_physically_inactive               28.911768
## per_excessive_drinking               121.640604
## per_flu_vaccinated                    21.397055
## per_completed_hs                      19.164541
## per_some_college                      25.379371
## traffic_volume                        29.353459
## per_broadband_access                  64.327294
varImpPlot(rf_per_low_birthweight_top_counties, type = 2)

rf_per_freq_mental_distress_top_counties <- randomForest(per_freq_mental_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)

print(rf_per_freq_mental_distress_top_counties)
## 
## Call:
##  randomForest(formula = per_freq_mental_distress ~ per_rural +      median_household_income + population + per_access_to_exercise +      per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile +      per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile +      per_severe_house_cost_burden + per_severe_housing_problems +      per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food +      per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 0.4369113
##                     % Var explained: 91.76
print(importance(rf_per_freq_mental_distress_top_counties,type = 2))
##                                   IncNodePurity
## per_rural                              9.805005
## median_household_income               91.055267
## population                            14.696418
## per_access_to_exercise                17.389718
## per_uninsured                         22.939374
## primary_care_phys_quartile             2.665413
## mental_health_providers_quartile       3.063052
## per_child_poverty                    146.304671
## air_pollution_avg_daily_pm2.5        247.526871
## drinking_water_violation_quartile      3.258657
## per_severe_house_cost_burden           6.210033
## per_severe_housing_problems            9.754670
## per_food_insecure                    277.928860
## sum_4_cumulative                      17.488185
## per_limited_access_healthy_food        8.578817
## per_black                             31.744099
## per_asian                             13.307164
## per_am_indian_alaska_native           18.327144
## per_nativeHA_other_pacific_isl         4.242416
## per_hispanic                          21.665807
## per_smokers                          837.002462
## per_physically_inactive               36.594622
## per_excessive_drinking               522.379759
## per_flu_vaccinated                    14.224086
## per_completed_hs                      59.844783
## per_some_college                     123.659869
## traffic_volume                        11.231708
## per_broadband_access                  34.528963
varImpPlot(rf_per_freq_mental_distress_top_counties, type = 2)

rf_per_freq_physical_distress_top_counties <- randomForest(per_freq_physical_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)

print(rf_per_freq_physical_distress_top_counties)
## 
## Call:
##  randomForest(formula = per_freq_physical_distress ~ per_rural +      median_household_income + population + per_access_to_exercise +      per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile +      per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile +      per_severe_house_cost_burden + per_severe_housing_problems +      per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food +      per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 0.4535493
##                     % Var explained: 92.83
print(importance(rf_per_freq_physical_distress_top_counties,type = 2))
##                                   IncNodePurity
## per_rural                             10.343930
## median_household_income              199.171118
## population                            20.342420
## per_access_to_exercise                13.349614
## per_uninsured                         21.554603
## primary_care_phys_quartile             3.090773
## mental_health_providers_quartile       3.945015
## per_child_poverty                    411.208032
## air_pollution_avg_daily_pm2.5         68.466269
## drinking_water_violation_quartile      3.077649
## per_severe_house_cost_burden          10.821128
## per_severe_housing_problems           16.930982
## per_food_insecure                    693.779544
## sum_4_cumulative                      14.645575
## per_limited_access_healthy_food       10.677688
## per_black                             65.978492
## per_asian                              9.302637
## per_am_indian_alaska_native           13.402594
## per_nativeHA_other_pacific_isl         4.164479
## per_hispanic                          38.210654
## per_smokers                          455.724776
## per_physically_inactive               21.722863
## per_excessive_drinking               495.855361
## per_flu_vaccinated                    13.692134
## per_completed_hs                     195.205198
## per_some_college                     218.020418
## traffic_volume                        14.101794
## per_broadband_access                  60.961589
varImpPlot(rf_per_freq_physical_distress_top_counties, type = 2)

rf_per_adult_obesity_top_counties <- randomForest(per_adult_obesity ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +  drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
                          data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)

print(rf_per_adult_obesity_top_counties)
## 
## Call:
##  randomForest(formula = per_adult_obesity ~ per_rural + median_household_income +      population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile +      mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 +      drinking_water_violation_quartile + per_severe_house_cost_burden +      per_severe_housing_problems + per_food_insecure + sum_4_cumulative +      per_limited_access_healthy_food + per_black + per_asian +      per_am_indian_alaska_native + per_nativeHA_other_pacific_isl +      per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking +      per_flu_vaccinated + per_completed_hs + per_some_college +      traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties,      importance = TRUE) 
##                Type of random forest: regression
##                      Number of trees: 500
## No. of variables tried at each split: 9
## 
##           Mean of squared residuals: 14.45692
##                     % Var explained: 34.68
print(importance(rf_per_adult_obesity_top_counties,type = 2))
##                                   IncNodePurity
## per_rural                             278.05949
## median_household_income               655.68342
## population                            446.01746
## per_access_to_exercise                304.42289
## per_uninsured                         352.06937
## primary_care_phys_quartile            130.37039
## mental_health_providers_quartile      144.88528
## per_child_poverty                     572.30404
## air_pollution_avg_daily_pm2.5         406.12721
## drinking_water_violation_quartile      48.83128
## per_severe_house_cost_burden          198.77955
## per_severe_housing_problems           177.27064
## per_food_insecure                     603.60036
## sum_4_cumulative                      363.91150
## per_limited_access_healthy_food       224.50740
## per_black                             818.31700
## per_asian                             245.11813
## per_am_indian_alaska_native           213.50143
## per_nativeHA_other_pacific_isl         83.95901
## per_hispanic                          565.44308
## per_smokers                           296.27611
## per_physically_inactive              1365.27588
## per_excessive_drinking                395.70774
## per_flu_vaccinated                    371.47379
## per_completed_hs                      222.68117
## per_some_college                      329.73818
## traffic_volume                        352.36274
## per_broadband_access                  360.69014
varImpPlot(rf_per_adult_obesity_top_counties, type = 2)